Libraries

library(rgdal)
library(ggplot2)
library(sf)
library(tidyverse)
library(ggmap)

Download the data

tmp <- tempfile()
url <- "https://github.com/ConnorCheng2/Data-Science-for-Good/archive/refs/heads/master.zip"
download.file(url, destfile = tmp)
unzip(tmp)
unlink(tmp)

Working directory and setting the correct path

To path πρέπει να το ορίσει ο κάθε χρήστης σωστά σύμφωνα με το working directory που δουλεύει. Το δικό μου είναι αυτό:

getwd()
## [1] "D:/DataAskiseis/Essay"

οπότε τα αρχείa που χρειαζόμαστε θα είναι μέσα στον φάκελο “cpe-data” με path = “../Essay/Data-Science-for-Good-master/cpe-data”

See the files

Τα δεδομένα αυτά αφορούν παραβιάσεις και εγκλήματα που έχουν καταγραφεί από διάφορα αστυνομικά τμήματα στην αμερική.

list.files(path = "../Essay/Data-Science-for-Good-master/cpe-data")
##  [1] "ACS_variable_descriptions.csv" "Dept_11-00091"                
##  [3] "Dept_23-00089"                 "Dept_24-00013"                
##  [5] "Dept_24-00098"                 "Dept_35-00016"                
##  [7] "Dept_35-00103"                 "Dept_37-00027"                
##  [9] "Dept_37-00049"                 "Dept_49-00009"                
## [11] "Dept_49-00033"                 "Dept_49-00035"                
## [13] "Dept_49-00081"

Read the shape file

Μπορούμε να χρησιμοποιήσουμε τα δεδομένα από οποιοδήποτε τμήμα επιθυμούμε.Εδώ θα χρησιμοποιήσουμε τα δεδομένα από το τμήμα “Dept_37-00027”.

# see the files inside "Dept_37-00027"
knitr::kable(list.files(path = "../Essay/Data-Science-for-Good-master/cpe-data/Dept_37-00027/37-00027_Shapefiles"))
x
APD_DIST.dbf
APD_DIST.sbn
APD_DIST.sbx
APD_DIST.shp
APD_DIST.shx
# read the shp file
shp <- readOGR(dsn = "../Essay/Data-Science-for-Good-master/cpe-data/Dept_37-00027/37-00027_Shapefiles/APD_DIST.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "D:\DataAskiseis\Essay\Data-Science-for-Good-master\cpe-data\Dept_37-00027\37-00027_Shapefiles\APD_DIST.shp", layer: "APD_DIST"
## with 53 features
## It has 19 fields

Class of our data

class(shp)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

Names of our data

names(shp)
##  [1] "NAME"       "SORTORDER"  "BATID"      "JURIID"     "COLOR"     
##  [6] "CODE"       "EXTERNALKE" "BATTALIONC" "DISTRICT"   "SECTOR"    
## [11] "INPUT_DATE" "MODIFIED_D" "INPUT_BY"   "MODIFIED_B" "BUREAU"    
## [16] "PATROL_ARE" "AREACOMMAN" "SHAPE_AREA" "SHAPE_LEN"

Summary of our data

summary(shp@data)
##      NAME             SORTORDER          BATID         JURIID      
##  Length:53          Min.   :0.0000   Min.   : 21   Min.   :  6.00  
##  Class :character   1st Qu.:0.0000   1st Qu.:164   1st Qu.: 12.00  
##  Mode  :character   Median :1.0000   Median :205   Median : 15.00  
##                     Mean   :0.6981   Mean   :253   Mean   : 53.38  
##                     3rd Qu.:1.0000   3rd Qu.:307   3rd Qu.:121.00  
##                     Max.   :1.0000   Max.   :520   Max.   :145.00  
##      COLOR              CODE            EXTERNALKE         BATTALIONC       
##  Min.   :   32768   Length:53          Length:53          Length:53         
##  1st Qu.:16711680   Class :character   Class :character   Class :character  
##  Median :16711808   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :14705527                                                           
##  3rd Qu.:16744576                                                           
##  Max.   :16776960                                                           
##    DISTRICT            SECTOR           INPUT_DATE         MODIFIED_D       
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    INPUT_BY          MODIFIED_B           BUREAU           PATROL_ARE       
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   AREACOMMAN          SHAPE_AREA          SHAPE_LEN       
##  Length:53          Min.   :    13987   Min.   :   478.7  
##  Class :character   1st Qu.: 13071085   1st Qu.: 19773.8  
##  Mode  :character   Median : 88304929   Median : 48972.5  
##                     Mean   :144632577   Mean   : 82607.7  
##                     3rd Qu.:189079669   3rd Qu.: 94701.7  
##                     Max.   :712649906   Max.   :453215.1

Convert Shape File into a dataframe

shp_df <- fortify(shp)
## Regions defined for each Polygons
class(shp_df)
## [1] "data.frame"

Plot a plain map

# plot map using ggplot
shpMap <- ggplot(data = shp_df, 
                 aes(long,lat))+
          geom_polygon(aes(group = group), 
                 fill = "grey80")+
                 coord_equal()+
               labs(x = "Longitude (Degrees)",
                    y = "Latitude (Degrees)",
                title = "Map ",
             subtitle = "Map - Based on the Lat Long in Shape Files")
shpMap

Read the csv file from “Dept_37-00027”

# read the csv
myData <- read_csv("../Essay/Data-Science-for-Good-master/cpe-data/Dept_37-00027/37-00027_UOF-P_2014-2016_prepped.csv", skip = 1)

Look at the csv file

Βλέπουμε ότι τα δεδομένα αυτά είναι για την πόλη Austin που βρίσκεται στην πολιτεία του Texas.

knitr::kable(myData[1:1,26:27])
City State
Austin TX

Οι πρώτες 4 γραμμές και οι 4 εώς 8 στήλες του csv αρχείου φαίνονται παρακάτω

knitr::kable(myData[1:4,4:8])
Area Command Nature of Contact Reason Desc Master Subject ID Subject Sex
FR VIEWED OFFENSE NECESSARY TO EFFECT ARREST / DETENTION 167510327: 2015541517 M
GE VIEWED OFFENSE NECESSARY TO EFFECT ARREST / DETENTION 459191174: 20151510003 M
GE VIEWED OFFENSE IN CUSTODY, MAINTAINING CONTROL 459191174: 20151510003 M
HE TRAFFIC STOP NECESSARY TO DEFEND REPORTING OFFICER 198377769: 2014111929 M

Count the crimes and rename some columns with long names

# count crimes
crimes <- myData %>% 
  group_by(`X-Coordinate`,
           `Y-Coordinate`) %>%
                    count() %>%
                    arrange(desc(n)) %>%
                    drop_na()

# rename long column names
names(crimes) <- c('x','y','n')

# consider only instances of crimes more than once
crimes_gt_1 <- crimes[crimes$n > 1,]

Draw the map with the crimes on it

shpMap + 
geom_point(aes(x = x, 
               y = y, 
            size = n, 
           alpha = 0.8, 
           color = "#fff000"), 
            data = crimes_gt_1)+
      labs(title = "Crime Occurences more than Once")+ 
      theme(legend.position = "none")

Better map

Ενας πιο όμορφος χάρτης του Austin με τα “VIEWED OFFENSE”,“TRAFFIC STOP”,“TACTICAL OPERATION”,“WARRANT SERVICE” καταγεγραμμένα πάνω σε αυτόν

# the trafic stop violations
ts_vo <- myData %>% 
  filter(`Nature of Contact` %in% c("VIEWED OFFENSE",
                                    "TRAFFIC STOP",
                                    "TACTICAL OPERATION",
                                    "WARRANT SERVICE"))

City_of_Austin <- as.factor(ts_vo$`Nature of Contact`)

# use gmplot to draw a better map
qmplot(Longitude, 
       Latitude, 
       data = ts_vo, 
    maptype = "watercolor", 
      color = City_of_Austin)